{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "采用特定头部的注意力权重，并动态地结合TF-IDF，可以进一步优化主题关键词的提取。特定头部的注意力权重可以捕捉不同的语义信息，而动态结合TF-IDF则能适应不同文档的特性，确保提取的关键词更加准确。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "缺点：分词有问题"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np \n",
    "import jieba\n",
    "import pandas as pd \n",
    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
    "from transformers import AutoModel, AutoTokenizer\n",
    "import torch\n",
    "from sklearn.decomposition import PCA\n",
    "import matplotlib.pyplot as plt "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/root/miniconda3/envs/py311/lib/python3.11/site-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n",
      "  warnings.warn(\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n"
     ]
    }
   ],
   "source": [
    "# 加载预训练的Bert模型和分词器\n",
    "tokenizer = AutoTokenizer.from_pretrained(\"bert-base-chinese\")\n",
    "model = AutoModel.from_pretrained(\"bert-base-chinese\", output_attentions=True)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 数据准备：实例文档\n",
    "documents = [\n",
    "    \"机器学习是一个非常有趣的领域。\",\n",
    "    \"人工智能是未来的发展方向。\",\n",
    "    \"今天天气很好，适合出去散步。\",\n",
    "    \"我喜欢在周末去爬山。\",\n",
    "    \"深度学习模型需要大量的数据。\",\n",
    "    \"自然语言处理是人工智能的重要组成部分。\"\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def tokenize_documents(docs):\n",
    "    tokenized_docs = [list(jieba.cut(doc)) for doc in docs]\n",
    "    return tokenized_docs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Building prefix dict from the default dictionary ...\n",
      "Loading model from cache /tmp/jieba.cache\n",
      "Loading model cost 0.730 seconds.\n",
      "Prefix dict has been built successfully.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[['机器', '学习', '是', '一个', '非常', '有趣', '的', '领域', '。'],\n",
       " ['人工智能', '是', '未来', '的', '发展', '方向', '。'],\n",
       " ['今天天气', '很', '好', '，', '适合', '出去', '散步', '。'],\n",
       " ['我', '喜欢', '在', '周末', '去', '爬山', '。'],\n",
       " ['深度', '学习', '模型', '需要', '大量', '的', '数据', '。'],\n",
       " ['自然语言', '处理', '是', '人工智能', '的', '重要', '组成部分', '。']]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tokenized_documents = tokenize_documents(documents)\n",
    "tokenized_documents"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([' ', '。', '一', '个', '习', '人', '今', '出', '分', '去', '发', '合', '向',\n",
       "       '周', '喜', '器', '在', '型', '域', '处', '大', '天', '好', '学', '展', '山',\n",
       "       '工', '常', '度', '很', '成', '我', '据', '散', '数', '方', '是', '智', '有',\n",
       "       '未', '末', '机', '来', '模', '欢', '步', '气', '深', '然', '爬', '理', '的',\n",
       "       '组', '能', '自', '要', '言', '语', '趣', '适', '部', '重', '量', '需', '非',\n",
       "       '领', '，'], dtype=object)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 计算TF-IDF权重\n",
    "vectorizer = TfidfVectorizer(tokenizer=lambda x: x, preprocessor=lambda x: x, token_pattern=None)\n",
    "tfidf_matrix = vectorizer.fit_transform([\" \".join(doc) for doc in tokenized_documents])\n",
    "feature_names = vectorizer.get_feature_names_out()\n",
    "feature_names"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 动态调整结合权重的参数\n",
    "def dynamic_weight_adjustment(tfidf_scores):\n",
    "    avg_tfidf = np.mean(list(tfidf_scores.values()))\n",
    "    return 0.5 + avg_tfidf * 0.5  # 简单示例，权重范围 [0.5, 1.0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def extract_attention_keywords(tokens, tfidf_weights, top_k=3, selected_head=0, dynamic_factor=True):\n",
    "    # 将分词后的文本重新拼接为BERT输入的格式\n",
    "    inputs = tokenizer(tokens, return_tensors='pt', is_split_into_words=True, truncation=True, padding=True, max_length=128)\n",
    "    outputs = model(**inputs)\n",
    "    \n",
    "    # 获取所有层的注意力权重\n",
    "    attentions = outputs.attentions  # [layers, batch, heads, seq_len, seq_len]\n",
    "\n",
    "    # 初始化一个字典来存储词语的特定头部注意力得分\n",
    "    combined_attention_scores = np.zeros(inputs['input_ids'].shape[1])\n",
    "\n",
    "    # 选择特定头部的注意力权重（例如第一个头部）\n",
    "    for layer_attention in attentions:\n",
    "        cls_attention_head = layer_attention[0, selected_head, 0, :]  # [seq_len]\n",
    "        combined_attention_scores += cls_attention_head.detach().numpy()\n",
    "\n",
    "    # 将注意力权重映射回词语\n",
    "    token_scores = {}\n",
    "    for idx, token in enumerate(tokens):\n",
    "        tfidf_score = tfidf_weights.get(token, 0)\n",
    "        attention_score = combined_attention_scores[idx]\n",
    "        token_scores[token] = attention_score*tfidf_score\n",
    "\n",
    "    if dynamic_factor:\n",
    "        tfidf_weight_factor = dynamic_weight_adjustment(tfidf_weights)\n",
    "        token_score = {k: v * tfidf_weight_factor for k, v in token_scores.items()}\n",
    "\n",
    "    sorted_tokens = sorted(token_scores.items(), key=lambda x: x[1], reverse=True)\n",
    "\n",
    "    # 提取前top_k个关键词\n",
    "    keywords = [token for token, score in sorted_tokens[:top_k]]\n",
    "    \n",
    "    return keywords"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "ename": "TypeError",
     "evalue": "PreTokenizedEncodeInput must be Union[PreTokenizedInputSequence, Tuple[PreTokenizedInputSequence, PreTokenizedInputSequence]]",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[8], line 5\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i, doc \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(documents):\n\u001b[1;32m      3\u001b[0m     \u001b[38;5;66;03m# 获取当前文档的TF-IDF权重\u001b[39;00m\n\u001b[1;32m      4\u001b[0m     tfidf_scores \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mdict\u001b[39m(\u001b[38;5;28mzip\u001b[39m(feature_names, tfidf_matrix[i]\u001b[38;5;241m.\u001b[39mtoarray()\u001b[38;5;241m.\u001b[39mflatten()))\n\u001b[0;32m----> 5\u001b[0m     keywords \u001b[38;5;241m=\u001b[39m \u001b[43mextract_attention_keywords\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdoc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtfidf_scores\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mselected_head\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m      6\u001b[0m     \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m文档: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdoc\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m提取的主题关键词: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mkeywords\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n",
      "Cell \u001b[0;32mIn[7], line 3\u001b[0m, in \u001b[0;36mextract_attention_keywords\u001b[0;34m(tokens, tfidf_weights, top_k, selected_head, dynamic_factor)\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mextract_attention_keywords\u001b[39m(tokens, tfidf_weights, top_k\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m3\u001b[39m, selected_head\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m, dynamic_factor\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m):\n\u001b[1;32m      2\u001b[0m     \u001b[38;5;66;03m# 将分词后的文本重新拼接为BERT输入的格式\u001b[39;00m\n\u001b[0;32m----> 3\u001b[0m     inputs \u001b[38;5;241m=\u001b[39m \u001b[43mtokenizer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtokens\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mreturn_tensors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mpt\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mis_split_into_words\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtruncation\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpadding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmax_length\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m128\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m      4\u001b[0m     outputs \u001b[38;5;241m=\u001b[39m model(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39minputs)\n\u001b[1;32m      6\u001b[0m     \u001b[38;5;66;03m# 获取所有层的注意力权重\u001b[39;00m\n",
      "File \u001b[0;32m~/miniconda3/envs/py311/lib/python3.11/site-packages/transformers/tokenization_utils_base.py:3055\u001b[0m, in \u001b[0;36mPreTrainedTokenizerBase.__call__\u001b[0;34m(self, text, text_pair, text_target, text_pair_target, add_special_tokens, padding, truncation, max_length, stride, is_split_into_words, pad_to_multiple_of, return_tensors, return_token_type_ids, return_attention_mask, return_overflowing_tokens, return_special_tokens_mask, return_offsets_mapping, return_length, verbose, **kwargs)\u001b[0m\n\u001b[1;32m   3053\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_in_target_context_manager:\n\u001b[1;32m   3054\u001b[0m         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_switch_to_input_mode()\n\u001b[0;32m-> 3055\u001b[0m     encodings \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_one\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtext\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtext\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtext_pair\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtext_pair\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mall_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   3056\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m text_target \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m   3057\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_switch_to_target_mode()\n",
      "File \u001b[0;32m~/miniconda3/envs/py311/lib/python3.11/site-packages/transformers/tokenization_utils_base.py:3163\u001b[0m, in \u001b[0;36mPreTrainedTokenizerBase._call_one\u001b[0;34m(self, text, text_pair, add_special_tokens, padding, truncation, max_length, stride, is_split_into_words, pad_to_multiple_of, return_tensors, return_token_type_ids, return_attention_mask, return_overflowing_tokens, return_special_tokens_mask, return_offsets_mapping, return_length, verbose, split_special_tokens, **kwargs)\u001b[0m\n\u001b[1;32m   3142\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbatch_encode_plus(\n\u001b[1;32m   3143\u001b[0m         batch_text_or_text_pairs\u001b[38;5;241m=\u001b[39mbatch_text_or_text_pairs,\n\u001b[1;32m   3144\u001b[0m         add_special_tokens\u001b[38;5;241m=\u001b[39madd_special_tokens,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   3160\u001b[0m         \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m   3161\u001b[0m     )\n\u001b[1;32m   3162\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 3163\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mencode_plus\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   3164\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtext\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtext\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3165\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtext_pair\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtext_pair\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3166\u001b[0m \u001b[43m        \u001b[49m\u001b[43madd_special_tokens\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43madd_special_tokens\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3167\u001b[0m \u001b[43m        \u001b[49m\u001b[43mpadding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpadding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3168\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtruncation\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtruncation\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3169\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmax_length\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmax_length\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3170\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstride\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstride\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3171\u001b[0m \u001b[43m        \u001b[49m\u001b[43mis_split_into_words\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mis_split_into_words\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3172\u001b[0m \u001b[43m        \u001b[49m\u001b[43mpad_to_multiple_of\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpad_to_multiple_of\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3173\u001b[0m \u001b[43m        \u001b[49m\u001b[43mreturn_tensors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_tensors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3174\u001b[0m \u001b[43m        \u001b[49m\u001b[43mreturn_token_type_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_token_type_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3175\u001b[0m \u001b[43m        \u001b[49m\u001b[43mreturn_attention_mask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_attention_mask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3176\u001b[0m \u001b[43m        \u001b[49m\u001b[43mreturn_overflowing_tokens\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_overflowing_tokens\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3177\u001b[0m \u001b[43m        \u001b[49m\u001b[43mreturn_special_tokens_mask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_special_tokens_mask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3178\u001b[0m \u001b[43m        \u001b[49m\u001b[43mreturn_offsets_mapping\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_offsets_mapping\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3179\u001b[0m \u001b[43m        \u001b[49m\u001b[43mreturn_length\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_length\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3180\u001b[0m \u001b[43m        \u001b[49m\u001b[43mverbose\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mverbose\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3181\u001b[0m \u001b[43m        \u001b[49m\u001b[43msplit_special_tokens\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msplit_special_tokens\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3182\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3183\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/miniconda3/envs/py311/lib/python3.11/site-packages/transformers/tokenization_utils_base.py:3237\u001b[0m, in \u001b[0;36mPreTrainedTokenizerBase.encode_plus\u001b[0;34m(self, text, text_pair, add_special_tokens, padding, truncation, max_length, stride, is_split_into_words, pad_to_multiple_of, return_tensors, return_token_type_ids, return_attention_mask, return_overflowing_tokens, return_special_tokens_mask, return_offsets_mapping, return_length, verbose, **kwargs)\u001b[0m\n\u001b[1;32m   3227\u001b[0m \u001b[38;5;66;03m# Backward compatibility for 'truncation_strategy', 'pad_to_max_length'\u001b[39;00m\n\u001b[1;32m   3228\u001b[0m padding_strategy, truncation_strategy, max_length, kwargs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_padding_truncation_strategies(\n\u001b[1;32m   3229\u001b[0m     padding\u001b[38;5;241m=\u001b[39mpadding,\n\u001b[1;32m   3230\u001b[0m     truncation\u001b[38;5;241m=\u001b[39mtruncation,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   3234\u001b[0m     \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m   3235\u001b[0m )\n\u001b[0;32m-> 3237\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_encode_plus\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   3238\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtext\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtext\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3239\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtext_pair\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtext_pair\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3240\u001b[0m \u001b[43m    \u001b[49m\u001b[43madd_special_tokens\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43madd_special_tokens\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3241\u001b[0m \u001b[43m    \u001b[49m\u001b[43mpadding_strategy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpadding_strategy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3242\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtruncation_strategy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtruncation_strategy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3243\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmax_length\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmax_length\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3244\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstride\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstride\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3245\u001b[0m \u001b[43m    \u001b[49m\u001b[43mis_split_into_words\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mis_split_into_words\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3246\u001b[0m \u001b[43m    \u001b[49m\u001b[43mpad_to_multiple_of\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpad_to_multiple_of\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3247\u001b[0m \u001b[43m    \u001b[49m\u001b[43mreturn_tensors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_tensors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3248\u001b[0m \u001b[43m    \u001b[49m\u001b[43mreturn_token_type_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_token_type_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3249\u001b[0m \u001b[43m    \u001b[49m\u001b[43mreturn_attention_mask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_attention_mask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3250\u001b[0m \u001b[43m    \u001b[49m\u001b[43mreturn_overflowing_tokens\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_overflowing_tokens\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3251\u001b[0m \u001b[43m    \u001b[49m\u001b[43mreturn_special_tokens_mask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_special_tokens_mask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3252\u001b[0m \u001b[43m    \u001b[49m\u001b[43mreturn_offsets_mapping\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_offsets_mapping\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3253\u001b[0m \u001b[43m    \u001b[49m\u001b[43mreturn_length\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_length\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3254\u001b[0m \u001b[43m    \u001b[49m\u001b[43mverbose\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mverbose\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3255\u001b[0m \u001b[43m    \u001b[49m\u001b[43msplit_special_tokens\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpop\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msplit_special_tokens\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msplit_special_tokens\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3256\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3257\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/miniconda3/envs/py311/lib/python3.11/site-packages/transformers/tokenization_utils_fast.py:601\u001b[0m, in \u001b[0;36mPreTrainedTokenizerFast._encode_plus\u001b[0;34m(self, text, text_pair, add_special_tokens, padding_strategy, truncation_strategy, max_length, stride, is_split_into_words, pad_to_multiple_of, return_tensors, return_token_type_ids, return_attention_mask, return_overflowing_tokens, return_special_tokens_mask, return_offsets_mapping, return_length, verbose, split_special_tokens, **kwargs)\u001b[0m\n\u001b[1;32m    578\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_encode_plus\u001b[39m(\n\u001b[1;32m    579\u001b[0m     \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m    580\u001b[0m     text: Union[TextInput, PreTokenizedInput],\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    598\u001b[0m     \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m    599\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m BatchEncoding:\n\u001b[1;32m    600\u001b[0m     batched_input \u001b[38;5;241m=\u001b[39m [(text, text_pair)] \u001b[38;5;28;01mif\u001b[39;00m text_pair \u001b[38;5;28;01melse\u001b[39;00m [text]\n\u001b[0;32m--> 601\u001b[0m     batched_output \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_batch_encode_plus\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    602\u001b[0m \u001b[43m        \u001b[49m\u001b[43mbatched_input\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    603\u001b[0m \u001b[43m        \u001b[49m\u001b[43mis_split_into_words\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mis_split_into_words\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    604\u001b[0m \u001b[43m        \u001b[49m\u001b[43madd_special_tokens\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43madd_special_tokens\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    605\u001b[0m \u001b[43m        \u001b[49m\u001b[43mpadding_strategy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpadding_strategy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    606\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtruncation_strategy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtruncation_strategy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    607\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmax_length\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmax_length\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    608\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstride\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstride\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    609\u001b[0m \u001b[43m        \u001b[49m\u001b[43mpad_to_multiple_of\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpad_to_multiple_of\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    610\u001b[0m \u001b[43m        \u001b[49m\u001b[43mreturn_tensors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_tensors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    611\u001b[0m \u001b[43m        \u001b[49m\u001b[43mreturn_token_type_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_token_type_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    612\u001b[0m \u001b[43m        \u001b[49m\u001b[43mreturn_attention_mask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_attention_mask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    613\u001b[0m \u001b[43m        \u001b[49m\u001b[43mreturn_overflowing_tokens\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_overflowing_tokens\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    614\u001b[0m \u001b[43m        \u001b[49m\u001b[43mreturn_special_tokens_mask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_special_tokens_mask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    615\u001b[0m \u001b[43m        \u001b[49m\u001b[43mreturn_offsets_mapping\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_offsets_mapping\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    616\u001b[0m \u001b[43m        \u001b[49m\u001b[43mreturn_length\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_length\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    617\u001b[0m \u001b[43m        \u001b[49m\u001b[43mverbose\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mverbose\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    618\u001b[0m \u001b[43m        \u001b[49m\u001b[43msplit_special_tokens\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msplit_special_tokens\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    619\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    620\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    622\u001b[0m     \u001b[38;5;66;03m# Return tensor is None, then we can remove the leading batch axis\u001b[39;00m\n\u001b[1;32m    623\u001b[0m     \u001b[38;5;66;03m# Overflowing tokens are returned as a batch of output so we keep them in this case\u001b[39;00m\n\u001b[1;32m    624\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m return_tensors \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m return_overflowing_tokens:\n",
      "File \u001b[0;32m~/miniconda3/envs/py311/lib/python3.11/site-packages/transformers/tokenization_utils_fast.py:528\u001b[0m, in \u001b[0;36mPreTrainedTokenizerFast._batch_encode_plus\u001b[0;34m(self, batch_text_or_text_pairs, add_special_tokens, padding_strategy, truncation_strategy, max_length, stride, is_split_into_words, pad_to_multiple_of, return_tensors, return_token_type_ids, return_attention_mask, return_overflowing_tokens, return_special_tokens_mask, return_offsets_mapping, return_length, verbose, split_special_tokens)\u001b[0m\n\u001b[1;32m    525\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_tokenizer\u001b[38;5;241m.\u001b[39mencode_special_tokens \u001b[38;5;241m!=\u001b[39m split_special_tokens:\n\u001b[1;32m    526\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_tokenizer\u001b[38;5;241m.\u001b[39mencode_special_tokens \u001b[38;5;241m=\u001b[39m split_special_tokens\n\u001b[0;32m--> 528\u001b[0m encodings \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_tokenizer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mencode_batch\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    529\u001b[0m \u001b[43m    \u001b[49m\u001b[43mbatch_text_or_text_pairs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    530\u001b[0m \u001b[43m    \u001b[49m\u001b[43madd_special_tokens\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43madd_special_tokens\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    531\u001b[0m \u001b[43m    \u001b[49m\u001b[43mis_pretokenized\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mis_split_into_words\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    532\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    534\u001b[0m \u001b[38;5;66;03m# Convert encoding to dict\u001b[39;00m\n\u001b[1;32m    535\u001b[0m \u001b[38;5;66;03m# `Tokens` has type: Tuple[\u001b[39;00m\n\u001b[1;32m    536\u001b[0m \u001b[38;5;66;03m#                       List[Dict[str, List[List[int]]]] or List[Dict[str, 2D-Tensor]],\u001b[39;00m\n\u001b[1;32m    537\u001b[0m \u001b[38;5;66;03m#                       List[EncodingFast]\u001b[39;00m\n\u001b[1;32m    538\u001b[0m \u001b[38;5;66;03m#                    ]\u001b[39;00m\n\u001b[1;32m    539\u001b[0m \u001b[38;5;66;03m# with nested dimensions corresponding to batch, overflows, sequence length\u001b[39;00m\n\u001b[1;32m    540\u001b[0m tokens_and_encodings \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m    541\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_convert_encoding(\n\u001b[1;32m    542\u001b[0m         encoding\u001b[38;5;241m=\u001b[39mencoding,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    551\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m encoding \u001b[38;5;129;01min\u001b[39;00m encodings\n\u001b[1;32m    552\u001b[0m ]\n",
      "\u001b[0;31mTypeError\u001b[0m: PreTokenizedEncodeInput must be Union[PreTokenizedInputSequence, Tuple[PreTokenizedInputSequence, PreTokenizedInputSequence]]"
     ]
    }
   ],
   "source": [
    "# 处理所有文档，提取主题关键词\n",
    "for i, doc in enumerate(documents):\n",
    "    # 获取当前文档的TF-IDF权重\n",
    "    tfidf_scores = dict(zip(feature_names, tfidf_matrix[i].toarray().flatten()))\n",
    "    keywords = extract_attention_keywords(doc, tfidf_scores, selected_head=0)\n",
    "    print(f\"文档: {doc}\\n提取的主题关键词: {keywords}\\n\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "py311",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.0"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
